-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[GlobalIsel] Combine G_UNMERGE_VALUES from opaque vectors into scalars #113040
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
%opaque:_(<2 x s64>) = G_OPAQUE
%un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(<2 x s64>)
->
%zero:_(s64) = G_CONSTANT i64 0
%one:_(s64) = G_CONSTANT i64 1
%un1:_(s64) = G_EXTRACT_VECTOR_ELT %opaque, $zero
%un2:_(s64) = G_EXTRACT_VECTOR_ELT %opaque, $one
unable to legalize instruction: %5:_(s128) = G_EXTRACT_VECTOR_ELT %3:_(<2 x s128>), %7:_(s64) (in function: fabs_v2f128)
Test:
llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Thorsten Schütt (tschuett) Changes%opaque:(<2 x s64>) = G_OPAQUE -> %zero:(s64) = G_CONSTANT i64 0 unable to legalize instruction: %5:(s128) = G_EXTRACT_VECTOR_ELT %3:(<2 x s128>), %7:_(s64) (in function: fabs_v2f128) Test: Patch is 1.17 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113040.diff 94 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9240a3c3127eb4..87409c88788e6a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -922,6 +922,10 @@ class CombinerHelper {
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
BuildFnTy &MatchInfo);
+ // unmerge_values(opaque vector) -> extract vector elt
+ bool matchUnmergeValuesOfScalarAndVector(const MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ead4149fc11068..39dd58837d5750 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -840,6 +840,14 @@ def unmerge_anyext_build_vector : GICombineRule<
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
>;
+// Transform unmerge opaque vector -> extract vector elt
+def unmerge_opaque_vector : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_UNMERGE_VALUES): $root,
+ [{ return Helper.matchUnmergeValuesOfScalarAndVector(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
// Transform x,y = unmerge(zext(z)) -> x = zext z; y = 0.
def unmerge_zext_to_zext : GICombineRule<
(defs root:$d),
@@ -855,7 +863,8 @@ def merge_combines: GICombineGroup<[
unmerge_cst,
unmerge_undef,
unmerge_dead_to_trunc,
- unmerge_zext_to_zext
+ unmerge_zext_to_zext,
+ unmerge_opaque_vector
]>;
// Under certain conditions, transform:
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index af1717dbf76f39..a45024d120be68 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMGlobalISel
GlobalISel.cpp
Combiner.cpp
CombinerHelper.cpp
+ CombinerHelperArtifacts.cpp
CombinerHelperCasts.cpp
CombinerHelperCompares.cpp
CombinerHelperVectorOps.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ddf9f479ef8e..f9b1621955c217 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7611,85 +7611,3 @@ bool CombinerHelper::matchFoldAMinusC1PlusC2(const MachineInstr &MI,
return true;
}
-
-bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
-
- if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
- return false;
-
- const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
-
- LLT DstTy = MRI.getType(Unmerge->getReg(0));
-
- // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
- // $any:_(<8 x s16>) = G_ANYEXT $bv
- // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
- //
- // ->
- //
- // $any:_(s16) = G_ANYEXT $bv[0]
- // $any1:_(s16) = G_ANYEXT $bv[1]
- // $any2:_(s16) = G_ANYEXT $bv[2]
- // $any3:_(s16) = G_ANYEXT $bv[3]
- // $any4:_(s16) = G_ANYEXT $bv[4]
- // $any5:_(s16) = G_ANYEXT $bv[5]
- // $any6:_(s16) = G_ANYEXT $bv[6]
- // $any7:_(s16) = G_ANYEXT $bv[7]
- // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
- // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
-
- // We want to unmerge into vectors.
- if (!DstTy.isFixedVector())
- return false;
-
- const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
- if (!Any)
- return false;
-
- const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
-
- if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
- // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
-
- if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
- return false;
-
- // FIXME: check element types?
- if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
- return false;
-
- LLT BigBvTy = MRI.getType(BV->getReg(0));
- LLT SmallBvTy = DstTy;
- LLT SmallBvElemenTy = SmallBvTy.getElementType();
-
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
- return false;
-
- // We check the legality of scalar anyext.
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_ANYEXT,
- {SmallBvElemenTy, BigBvTy.getElementType()}}))
- return false;
-
- MatchInfo = [=](MachineIRBuilder &B) {
- // Build into each G_UNMERGE_VALUES def
- // a small build vector with anyext from the source build vector.
- for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
- SmallVector<Register> Ops;
- for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
- Register SourceArray =
- BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
- auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
- Ops.push_back(AnyExt.getReg(0));
- }
- B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
- };
- };
- return true;
- };
-
- return false;
-}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
new file mode 100644
index 00000000000000..805d34ae0493c4
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
@@ -0,0 +1,169 @@
+//===- CombinerHelperArtifacts.cpp-----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for legalization artifacts.
+//
+//===----------------------------------------------------------------------===//
+//
+// G_UNMERGE_VALUES
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
+
+ if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
+ return false;
+
+ const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
+
+ LLT DstTy = MRI.getType(Unmerge->getReg(0));
+
+ // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
+ // $any:_(<8 x s16>) = G_ANYEXT $bv
+ // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
+ //
+ // ->
+ //
+ // $any:_(s16) = G_ANYEXT $bv[0]
+ // $any1:_(s16) = G_ANYEXT $bv[1]
+ // $any2:_(s16) = G_ANYEXT $bv[2]
+ // $any3:_(s16) = G_ANYEXT $bv[3]
+ // $any4:_(s16) = G_ANYEXT $bv[4]
+ // $any5:_(s16) = G_ANYEXT $bv[5]
+ // $any6:_(s16) = G_ANYEXT $bv[6]
+ // $any7:_(s16) = G_ANYEXT $bv[7]
+ // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
+ // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
+
+ // We want to unmerge into vectors.
+ if (!DstTy.isFixedVector())
+ return false;
+
+ const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
+ if (!Any)
+ return false;
+
+ const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
+
+ if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
+ // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
+
+ if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
+ return false;
+
+ // FIXME: check element types?
+ if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
+ return false;
+
+ LLT BigBvTy = MRI.getType(BV->getReg(0));
+ LLT SmallBvTy = DstTy;
+ LLT SmallBvElemenTy = SmallBvTy.getElementType();
+
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
+ return false;
+
+ // We check the legality of scalar anyext.
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ANYEXT,
+ {SmallBvElemenTy, BigBvTy.getElementType()}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ // Build into each G_UNMERGE_VALUES def
+ // a small build vector with anyext from the source build vector.
+ for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
+ SmallVector<Register> Ops;
+ for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
+ Register SourceArray =
+ BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
+ auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
+ Ops.push_back(AnyExt.getReg(0));
+ }
+ B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
+ };
+ };
+ return true;
+ };
+
+ return false;
+}
+
+bool CombinerHelper::matchUnmergeValuesOfScalarAndVector(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+
+ constexpr unsigned MAX_NUM_DEFS_LIMIT = 8;
+
+ // %opaque:_(<2 x s64>) = G_OPAQUE
+ // %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(<2 x s64>)
+ //
+ // ->
+ //
+ // %zero:_(s64) = G_CONSTANT i64 0
+ // %one:_(s64) = G_CONSTANT i64 1
+ // %un1:_(s64) = G_EXTRACT_VECTOR_ELT %opaque, $zero
+ // %un2:_(s64) = G_EXTRACT_VECTOR_ELT %opaque, $one
+
+ const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
+
+ if (Unmerge->getNumDefs() > MAX_NUM_DEFS_LIMIT)
+ return false;
+
+ LLT DstTy = MRI.getType(Unmerge->getReg(0));
+ LLT SrcTy = MRI.getType(Unmerge->getSourceReg());
+
+ // We want to unmerge a vector into scalars.
+ if (!DstTy.isScalar() || !SrcTy.isFixedVector() || DstTy.getSizeInBits() > 64)
+ return false;
+
+ if (DstTy != SrcTy.getElementType())
+ return false;
+
+ // We want to unmerge from an opaque vector.
+ const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
+ if (isa<GBuildVector>(Source))
+ return false;
+
+ unsigned PreferredVecIdxWidth =
+ getTargetLowering().getVectorIdxTy(getDataLayout()).getSizeInBits();
+
+ LLT IdxTy = LLT::scalar(PreferredVecIdxWidth);
+
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_EXTRACT_VECTOR_ELT, {DstTy, SrcTy, IdxTy}}))
+ return false;
+
+ if (!isConstantLegalOrBeforeLegalizer(IdxTy))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
+ auto Index = B.buildConstant(IdxTy, I);
+ B.buildExtractVectorElement(Unmerge->getOperand(I).getReg(),
+ Unmerge->getSourceReg(), Index);
+ }
+ };
+
+ return true;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 8af8cdfeba6ac4..1eb7488e4ff570 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -322,7 +322,7 @@ def AArch64PostLegalizerCombiner
extractvecelt_pairwise_add, redundant_or,
mul_const, redundant_sext_inreg,
form_bitfield_extract, rotate_out_of_range,
- icmp_to_true_false_known_bits,
+ icmp_to_true_false_known_bits, vector_ops_combines,
select_combines, fold_merge_to_zext,
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
index 7566d38e6c6cfa..fc7584a2e1b162 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
@@ -422,9 +422,12 @@ body: |
; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_vector
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<2 x s32>) = G_ZEXT [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](<2 x s32>)
- ; CHECK-NEXT: $w0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ZEXT]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ZEXT]](<2 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[EVEC1]](s32)
%0:_(<2 x s16>) = COPY $w0
%3:_(<2 x s32>) = G_ZEXT %0(<2 x s16>)
%1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(<2 x s32>)
@@ -539,3 +542,98 @@ body: |
$q0 = COPY %un1(s128)
$q1 = COPY %un2(s128)
...
+
+# Check that we unmerge the opaque vector into extract vector elt
+---
+name: test_opaque_vector_scalar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_opaque_vector_scalar
+ ; CHECK: %opaque:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %un1:_(s64) = G_EXTRACT_VECTOR_ELT %opaque(<2 x s64>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %un2:_(s64) = G_EXTRACT_VECTOR_ELT %opaque(<2 x s64>), [[C1]](s64)
+ ; CHECK-NEXT: $x0 = COPY %un1(s64)
+ ; CHECK-NEXT: $x1 = COPY %un2(s64)
+ %opaque:_(<2 x s64>) = COPY $q0
+ %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(<2 x s64>)
+ $x0 = COPY %un1(s64)
+ $x1 = COPY %un2(s64)
+...
+
+# Check that we don't unmerge the opaque vector into scalars
+---
+name: test_opaque_vector_vector
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_opaque_vector_vector
+ ; CHECK: %opaque:_(s128) = COPY $q0
+ ; CHECK-NEXT: %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(s128)
+ ; CHECK-NEXT: $x0 = COPY %un1(s64)
+ ; CHECK-NEXT: $x1 = COPY %un2(s64)
+ %opaque:_(s128) = COPY $q0
+ %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(s128)
+ $x0 = COPY %un1(s64)
+ $x1 = COPY %un2(s64)
+...
+
+# Check that we unmerge the long opaque vector into extract vector elt
+---
+name: test_long_opaque_vector_scalar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_long_opaque_vector_scalar
+ ; CHECK: %opaque:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %un1:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %un2:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C1]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: %un3:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C2]](s64)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: %un4:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C3]](s64)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: %un5:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C4]](s64)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: %un6:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C5]](s64)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: %un7:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C6]](s64)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; CHECK-NEXT: %un8:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C7]](s64)
+ ; CHECK-NEXT: %zext1:_(s32) = G_ZEXT %un1(s16)
+ ; CHECK-NEXT: %zext2:_(s32) = G_ZEXT %un2(s16)
+ ; CHECK-NEXT: %zext3:_(s32) = G_ZEXT %un3(s16)
+ ; CHECK-NEXT: %zext4:_(s32) = G_ZEXT %un4(s16)
+ ; CHECK-NEXT: %zext5:_(s32) = G_ZEXT %un5(s16)
+ ; CHECK-NEXT: %zext6:_(s32) = G_ZEXT %un6(s16)
+ ; CHECK-NEXT: %zext7:_(s32) = G_ZEXT %un7(s16)
+ ; CHECK-NEXT: %zext8:_(s32) = G_ZEXT %un8(s16)
+ ; CHECK-NEXT: $w0 = COPY %zext1(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext2(s32)
+ ; CHECK-NEXT: $w0 = COPY %zext3(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext4(s32)
+ ; CHECK-NEXT: $w0 = COPY %zext5(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext6(s32)
+ ; CHECK-NEXT: $w0 = COPY %zext7(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext8(s32)
+ %opaque:_(<8 x s16>) = COPY $q0
+ %un1:_(s16), %un2:_(s16), %un3:_(s16), %un4:_(s16), %un5:_(s16), %un6:_(s16), %un7:_(s16), %un8:_(s16) = G_UNMERGE_VALUES %opaque(<8 x s16>)
+ %zext1:_(s32) = G_ZEXT %un1
+ %zext2:_(s32) = G_ZEXT %un2
+ %zext3:_(s32) = G_ZEXT %un3
+ %zext4:_(s32) = G_ZEXT %un4
+ %zext5:_(s32) = G_ZEXT %un5
+ %zext6:_(s32) = G_ZEXT %un6
+ %zext7:_(s32) = G_ZEXT %un7
+ %zext8:_(s32) = G_ZEXT %un8
+ $w0 = COPY %zext1(s32)
+ $w1 = COPY %zext2(s32)
+ $w0 = COPY %zext3(s32)
+ $w1 = COPY %zext4(s32)
+ $w0 = COPY %zext5(s32)
+ $w1 = COPY %zext6(s32)
+ $w0 = COPY %zext7(s32)
+ $w1 = COPY %zext8(s32)
+...
+
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index f7aa57a068a4ce..4d75367fa06b49 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -590,14 +590,26 @@ entry:
}
define i16 @sminv_v3i16(<3 x i16> %a) {
-; CHECK-LABEL: sminv_v3i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov w8, #32767 // =0x7fff
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: sminv h0, v0.4h
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sminv_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff
+; CHECK-SD-NEXT: mov v0.h[3], w8
+; CHECK-SD-NEXT: sminv h0, v0.4h
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sminv_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov v1.h[0], v0.h[0]
+; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[1]
+; CHECK-GI-NEXT: mov v1.h[2], v0.h[2]
+; CHECK-GI-NEXT: mov v1.h[3], w8
+; CHECK-GI-NEXT: sminv h0, v1.4h
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
ret i16 %arg1
@@ -649,13 +661,24 @@ entry:
}
define i32 @sminv_v3i32(<3 x i32> %a) {
-; CHECK-LABEL: sminv_v3i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT: mov v0.s[3], w8
-; CHECK-NEXT: sminv s0, v0.4s
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sminv_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT: mov v0.s[3], w8
+; CHECK-SD-NEXT: sminv s0, v0.4s
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sminv_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-GI-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
+; CHECK-GI-NEXT: mov v1.s[2], v0.s[2]
+; CHECK-GI-NEXT: mov v1.s[3], w8
+; CHECK-GI-NEXT: sminv s0, v1.4s
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
entry:
%arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a)
ret i32 %arg1
@@ -954,9 +977,12 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
; CHECK-GI-LABEL: smaxv_v3i16:
; ...
[truncated]
|
|
@llvm/pr-subscribers-backend-amdgpu Author: Thorsten Schütt (tschuett) Changes%opaque:(<2 x s64>) = G_OPAQUE -> %zero:(s64) = G_CONSTANT i64 0 unable to legalize instruction: %5:(s128) = G_EXTRACT_VECTOR_ELT %3:(<2 x s128>), %7:_(s64) (in function: fabs_v2f128) Test: Patch is 1.17 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113040.diff 94 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9240a3c3127eb4..87409c88788e6a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -922,6 +922,10 @@ class CombinerHelper {
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
BuildFnTy &MatchInfo);
+ // unmerge_values(opaque vector) -> extract vector elt
+ bool matchUnmergeValuesOfScalarAndVector(const MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ead4149fc11068..39dd58837d5750 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -840,6 +840,14 @@ def unmerge_anyext_build_vector : GICombineRule<
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
>;
+// Transform unmerge opaque vector -> extract vector elt
+def unmerge_opaque_vector : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_UNMERGE_VALUES): $root,
+ [{ return Helper.matchUnmergeValuesOfScalarAndVector(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
// Transform x,y = unmerge(zext(z)) -> x = zext z; y = 0.
def unmerge_zext_to_zext : GICombineRule<
(defs root:$d),
@@ -855,7 +863,8 @@ def merge_combines: GICombineGroup<[
unmerge_cst,
unmerge_undef,
unmerge_dead_to_trunc,
- unmerge_zext_to_zext
+ unmerge_zext_to_zext,
+ unmerge_opaque_vector
]>;
// Under certain conditions, transform:
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index af1717dbf76f39..a45024d120be68 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMGlobalISel
GlobalISel.cpp
Combiner.cpp
CombinerHelper.cpp
+ CombinerHelperArtifacts.cpp
CombinerHelperCasts.cpp
CombinerHelperCompares.cpp
CombinerHelperVectorOps.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ddf9f479ef8e..f9b1621955c217 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7611,85 +7611,3 @@ bool CombinerHelper::matchFoldAMinusC1PlusC2(const MachineInstr &MI,
return true;
}
-
-bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
- BuildFnTy &MatchInfo) {
- const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
-
- if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
- return false;
-
- const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
-
- LLT DstTy = MRI.getType(Unmerge->getReg(0));
-
- // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
- // $any:_(<8 x s16>) = G_ANYEXT $bv
- // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
- //
- // ->
- //
- // $any:_(s16) = G_ANYEXT $bv[0]
- // $any1:_(s16) = G_ANYEXT $bv[1]
- // $any2:_(s16) = G_ANYEXT $bv[2]
- // $any3:_(s16) = G_ANYEXT $bv[3]
- // $any4:_(s16) = G_ANYEXT $bv[4]
- // $any5:_(s16) = G_ANYEXT $bv[5]
- // $any6:_(s16) = G_ANYEXT $bv[6]
- // $any7:_(s16) = G_ANYEXT $bv[7]
- // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
- // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
-
- // We want to unmerge into vectors.
- if (!DstTy.isFixedVector())
- return false;
-
- const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
- if (!Any)
- return false;
-
- const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
-
- if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
- // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
-
- if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
- return false;
-
- // FIXME: check element types?
- if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
- return false;
-
- LLT BigBvTy = MRI.getType(BV->getReg(0));
- LLT SmallBvTy = DstTy;
- LLT SmallBvElemenTy = SmallBvTy.getElementType();
-
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
- return false;
-
- // We check the legality of scalar anyext.
- if (!isLegalOrBeforeLegalizer(
- {TargetOpcode::G_ANYEXT,
- {SmallBvElemenTy, BigBvTy.getElementType()}}))
- return false;
-
- MatchInfo = [=](MachineIRBuilder &B) {
- // Build into each G_UNMERGE_VALUES def
- // a small build vector with anyext from the source build vector.
- for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
- SmallVector<Register> Ops;
- for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
- Register SourceArray =
- BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
- auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
- Ops.push_back(AnyExt.getReg(0));
- }
- B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
- };
- };
- return true;
- };
-
- return false;
-}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
new file mode 100644
index 00000000000000..805d34ae0493c4
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp
@@ -0,0 +1,169 @@
+//===- CombinerHelperArtifacts.cpp-----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for legalization artifacts.
+//
+//===----------------------------------------------------------------------===//
+//
+// G_UNMERGE_VALUES
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
+
+ if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
+ return false;
+
+ const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
+
+ LLT DstTy = MRI.getType(Unmerge->getReg(0));
+
+ // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
+ // $any:_(<8 x s16>) = G_ANYEXT $bv
+ // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
+ //
+ // ->
+ //
+ // $any:_(s16) = G_ANYEXT $bv[0]
+ // $any1:_(s16) = G_ANYEXT $bv[1]
+ // $any2:_(s16) = G_ANYEXT $bv[2]
+ // $any3:_(s16) = G_ANYEXT $bv[3]
+ // $any4:_(s16) = G_ANYEXT $bv[4]
+ // $any5:_(s16) = G_ANYEXT $bv[5]
+ // $any6:_(s16) = G_ANYEXT $bv[6]
+ // $any7:_(s16) = G_ANYEXT $bv[7]
+ // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
+ // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
+
+ // We want to unmerge into vectors.
+ if (!DstTy.isFixedVector())
+ return false;
+
+ const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
+ if (!Any)
+ return false;
+
+ const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
+
+ if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
+ // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
+
+ if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
+ return false;
+
+ // FIXME: check element types?
+ if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
+ return false;
+
+ LLT BigBvTy = MRI.getType(BV->getReg(0));
+ LLT SmallBvTy = DstTy;
+ LLT SmallBvElemenTy = SmallBvTy.getElementType();
+
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
+ return false;
+
+ // We check the legality of scalar anyext.
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ANYEXT,
+ {SmallBvElemenTy, BigBvTy.getElementType()}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ // Build into each G_UNMERGE_VALUES def
+ // a small build vector with anyext from the source build vector.
+ for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
+ SmallVector<Register> Ops;
+ for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
+ Register SourceArray =
+ BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
+ auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
+ Ops.push_back(AnyExt.getReg(0));
+ }
+ B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
+ };
+ };
+ return true;
+ };
+
+ return false;
+}
+
+bool CombinerHelper::matchUnmergeValuesOfScalarAndVector(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+
+ constexpr unsigned MAX_NUM_DEFS_LIMIT = 8;
+
+ // %opaque:_(<2 x s64>) = G_OPAQUE
+ // %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(<2 x s64>)
+ //
+ // ->
+ //
+ // %zero:_(s64) = G_CONSTANT i64 0
+ // %one:_(s64) = G_CONSTANT i64 1
+ // %un1:_(s64) = G_EXTRACT_VECTOR_ELT %opaque, $zero
+ // %un2:_(s64) = G_EXTRACT_VECTOR_ELT %opaque, $one
+
+ const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
+
+ if (Unmerge->getNumDefs() > MAX_NUM_DEFS_LIMIT)
+ return false;
+
+ LLT DstTy = MRI.getType(Unmerge->getReg(0));
+ LLT SrcTy = MRI.getType(Unmerge->getSourceReg());
+
+ // We want to unmerge a vector into scalars.
+ if (!DstTy.isScalar() || !SrcTy.isFixedVector() || DstTy.getSizeInBits() > 64)
+ return false;
+
+ if (DstTy != SrcTy.getElementType())
+ return false;
+
+ // We want to unmerge from an opaque vector.
+ const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
+ if (isa<GBuildVector>(Source))
+ return false;
+
+ unsigned PreferredVecIdxWidth =
+ getTargetLowering().getVectorIdxTy(getDataLayout()).getSizeInBits();
+
+ LLT IdxTy = LLT::scalar(PreferredVecIdxWidth);
+
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_EXTRACT_VECTOR_ELT, {DstTy, SrcTy, IdxTy}}))
+ return false;
+
+ if (!isConstantLegalOrBeforeLegalizer(IdxTy))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
+ auto Index = B.buildConstant(IdxTy, I);
+ B.buildExtractVectorElement(Unmerge->getOperand(I).getReg(),
+ Unmerge->getSourceReg(), Index);
+ }
+ };
+
+ return true;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 8af8cdfeba6ac4..1eb7488e4ff570 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -322,7 +322,7 @@ def AArch64PostLegalizerCombiner
extractvecelt_pairwise_add, redundant_or,
mul_const, redundant_sext_inreg,
form_bitfield_extract, rotate_out_of_range,
- icmp_to_true_false_known_bits,
+ icmp_to_true_false_known_bits, vector_ops_combines,
select_combines, fold_merge_to_zext,
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
index 7566d38e6c6cfa..fc7584a2e1b162 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
@@ -422,9 +422,12 @@ body: |
; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_vector
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<2 x s32>) = G_ZEXT [[COPY]](<2 x s16>)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](<2 x s32>)
- ; CHECK-NEXT: $w0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ZEXT]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ZEXT]](<2 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[EVEC1]](s32)
%0:_(<2 x s16>) = COPY $w0
%3:_(<2 x s32>) = G_ZEXT %0(<2 x s16>)
%1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(<2 x s32>)
@@ -539,3 +542,98 @@ body: |
$q0 = COPY %un1(s128)
$q1 = COPY %un2(s128)
...
+
+# Check that we unmerge the opaque vector into extract vector elt
+---
+name: test_opaque_vector_scalar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_opaque_vector_scalar
+ ; CHECK: %opaque:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %un1:_(s64) = G_EXTRACT_VECTOR_ELT %opaque(<2 x s64>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %un2:_(s64) = G_EXTRACT_VECTOR_ELT %opaque(<2 x s64>), [[C1]](s64)
+ ; CHECK-NEXT: $x0 = COPY %un1(s64)
+ ; CHECK-NEXT: $x1 = COPY %un2(s64)
+ %opaque:_(<2 x s64>) = COPY $q0
+ %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(<2 x s64>)
+ $x0 = COPY %un1(s64)
+ $x1 = COPY %un2(s64)
+...
+
+# Check that we don't unmerge the opaque vector into scalars
+---
+name: test_opaque_vector_vector
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_opaque_vector_vector
+ ; CHECK: %opaque:_(s128) = COPY $q0
+ ; CHECK-NEXT: %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(s128)
+ ; CHECK-NEXT: $x0 = COPY %un1(s64)
+ ; CHECK-NEXT: $x1 = COPY %un2(s64)
+ %opaque:_(s128) = COPY $q0
+ %un1:_(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(s128)
+ $x0 = COPY %un1(s64)
+ $x1 = COPY %un2(s64)
+...
+
+# Check that we unmerge the long opaque vector into extract vector elt
+---
+name: test_long_opaque_vector_scalar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_long_opaque_vector_scalar
+ ; CHECK: %opaque:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %un1:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %un2:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C1]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: %un3:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C2]](s64)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: %un4:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C3]](s64)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: %un5:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C4]](s64)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: %un6:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C5]](s64)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: %un7:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C6]](s64)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; CHECK-NEXT: %un8:_(s16) = G_EXTRACT_VECTOR_ELT %opaque(<8 x s16>), [[C7]](s64)
+ ; CHECK-NEXT: %zext1:_(s32) = G_ZEXT %un1(s16)
+ ; CHECK-NEXT: %zext2:_(s32) = G_ZEXT %un2(s16)
+ ; CHECK-NEXT: %zext3:_(s32) = G_ZEXT %un3(s16)
+ ; CHECK-NEXT: %zext4:_(s32) = G_ZEXT %un4(s16)
+ ; CHECK-NEXT: %zext5:_(s32) = G_ZEXT %un5(s16)
+ ; CHECK-NEXT: %zext6:_(s32) = G_ZEXT %un6(s16)
+ ; CHECK-NEXT: %zext7:_(s32) = G_ZEXT %un7(s16)
+ ; CHECK-NEXT: %zext8:_(s32) = G_ZEXT %un8(s16)
+ ; CHECK-NEXT: $w0 = COPY %zext1(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext2(s32)
+ ; CHECK-NEXT: $w0 = COPY %zext3(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext4(s32)
+ ; CHECK-NEXT: $w0 = COPY %zext5(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext6(s32)
+ ; CHECK-NEXT: $w0 = COPY %zext7(s32)
+ ; CHECK-NEXT: $w1 = COPY %zext8(s32)
+ %opaque:_(<8 x s16>) = COPY $q0
+ %un1:_(s16), %un2:_(s16), %un3:_(s16), %un4:_(s16), %un5:_(s16), %un6:_(s16), %un7:_(s16), %un8:_(s16) = G_UNMERGE_VALUES %opaque(<8 x s16>)
+ %zext1:_(s32) = G_ZEXT %un1
+ %zext2:_(s32) = G_ZEXT %un2
+ %zext3:_(s32) = G_ZEXT %un3
+ %zext4:_(s32) = G_ZEXT %un4
+ %zext5:_(s32) = G_ZEXT %un5
+ %zext6:_(s32) = G_ZEXT %un6
+ %zext7:_(s32) = G_ZEXT %un7
+ %zext8:_(s32) = G_ZEXT %un8
+ $w0 = COPY %zext1(s32)
+ $w1 = COPY %zext2(s32)
+ $w0 = COPY %zext3(s32)
+ $w1 = COPY %zext4(s32)
+ $w0 = COPY %zext5(s32)
+ $w1 = COPY %zext6(s32)
+ $w0 = COPY %zext7(s32)
+ $w1 = COPY %zext8(s32)
+...
+
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index f7aa57a068a4ce..4d75367fa06b49 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -590,14 +590,26 @@ entry:
}
define i16 @sminv_v3i16(<3 x i16> %a) {
-; CHECK-LABEL: sminv_v3i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov w8, #32767 // =0x7fff
-; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: sminv h0, v0.4h
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sminv_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff
+; CHECK-SD-NEXT: mov v0.h[3], w8
+; CHECK-SD-NEXT: sminv h0, v0.4h
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sminv_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov v1.h[0], v0.h[0]
+; CHECK-GI-NEXT: mov w8, #32767 // =0x7fff
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[1]
+; CHECK-GI-NEXT: mov v1.h[2], v0.h[2]
+; CHECK-GI-NEXT: mov v1.h[3], w8
+; CHECK-GI-NEXT: sminv h0, v1.4h
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
ret i16 %arg1
@@ -649,13 +661,24 @@ entry:
}
define i32 @sminv_v3i32(<3 x i32> %a) {
-; CHECK-LABEL: sminv_v3i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT: mov v0.s[3], w8
-; CHECK-NEXT: sminv s0, v0.4s
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sminv_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-SD-NEXT: mov v0.s[3], w8
+; CHECK-SD-NEXT: sminv s0, v0.4s
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sminv_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
+; CHECK-GI-NEXT: mov w8, #2147483647 // =0x7fffffff
+; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
+; CHECK-GI-NEXT: mov v1.s[2], v0.s[2]
+; CHECK-GI-NEXT: mov v1.s[3], w8
+; CHECK-GI-NEXT: sminv s0, v1.4s
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
entry:
%arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a)
ret i32 %arg1
@@ -954,9 +977,12 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
; CHECK-GI-LABEL: smaxv_v3i16:
; ...
[truncated]
|
|
Looks worse than expected. |
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reword description, there is no G_OPAQUE?
| ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV]](s32) | ||
| ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) | ||
| ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) | ||
| ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) | ||
| ; GFX10-NEXT: $vgpr0 = COPY [[COPY15]](s32) | ||
| ; GFX10-NEXT: $vgpr1 = COPY [[COPY16]](s32) | ||
| ; GFX10-NEXT: $vgpr2 = COPY [[COPY17]](s32) | ||
| ; GFX10-NEXT: $vgpr3 = COPY [[COPY18]](s32) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ugly avoid able copy
| ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4 | ||
| ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2 | ||
| ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 | ||
| ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pretty bad
| return true; | ||
| } | ||
|
|
||
| bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be easier to review if the diff was local to the file, and this code wasn't moved to another file
|
I am open to suggestions, but |
|
The regressions are too hard. |
|
I am not accepting my contribution. We have existing G_UNMERGE_VALUES combines that take the source into account: G_CONSTANT, G_BUILD_VECTOR, anyext of G_BUILD_VECTOR, and more. Here I am probably for the first time trying to blindly kill an unmerge without taking the source into account. It is not profitable. I am trying to legalize G_EXTRACT_SUBVECTOR. I have already the code to combine an unmerge without taking the source into account. It is probably better to invest into unmerge combines that take the source into account. |
%opaque:(<2 x s64>) = G_OPAQUE
%un1:(s64), %un2:_(s64) = G_UNMERGE_VALUES %opaque(<2 x s64>)
->
%zero:(s64) = G_CONSTANT i64 0
%one:(s64) = G_CONSTANT i64 1
%un1:(s64) = G_EXTRACT_VECTOR_ELT %opaque, $zero
%un2:(s64) = G_EXTRACT_VECTOR_ELT %opaque, $one
unable to legalize instruction: %5:(s128) = G_EXTRACT_VECTOR_ELT %3:(<2 x s128>), %7:_(s64) (in function: fabs_v2f128)
Test:
llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir